/*
Obtain the gendered amenity classifications and add them to the analysis samples
	// input: amenities_gender_sample, clauselabels, estab_union_act, filledpanel
	// output: amenity_value_gender, amenities_classification_DD
	// modifies: estab_union_act and filldepanel
 
Uses 6 methods to get at female/male centric amenities
	o	2 samples: a random split-sample or the full sample of establishments
	o	3 ways of calculating the PageRank gender gaps
		-	Difference in values (without normalization)
		-	Difference in normalized values: chooses the establishment among those with the smallest gap in pay premiums
		-	Difference in normalized rank index, i.e., from 0 to 100

Adds the clause counts from the data driven approach into the new contracts sample and the filled panel
	o	DDbase: using the difference in normalized values from the random split-sample
	o	DDall: clauses that are identified as female/male-centric regardless of the method used
	o	DDany: clauses that are identified as female/male-centric in any of the method used
*/

cap log close
cap log using "$logs/amenity_value_coeffs_gender_log", replace


*************
**LOAD DATA**
*************

* Sample for data-driven classification of clauses
use "$files\amenities_gender_sample", clear

* PageRank normalizations
gen fe_diff = abs(fe_m-fe_f)
gen pagerank_diff = abs(pagerank_m-pagerank_f)
qui sum fe_diff, d
list estabid fe_m fe_f fe_diff pagerank_m pagerank_f pagerank_diff if fe_diff==r(min)
qui sum pagerank_diff if fe_diff==r(min)
list estabid fe_m fe_f fe_diff pagerank_m pagerank_f pagerank_diff if pagerank_diff==r(min)
gen Diff = pagerank_diff if pagerank_diff==r(min)
gen X = pagerank_m if pagerank_diff==r(min)
gen Ratio = pagerank_m/pagerank_f if pagerank_diff==r(min)
gen B = Diff/(X*(Ratio-1))
gen A = (Diff/X)+B
gen pagerank_m_norm = pagerank_m
sum Ratio
gen pagerank_f_norm = pagerank_f*r(mean)

* PageRanks: logs of exp(V)
gen ln_pagerank_m = log(pagerank_m)
gen ln_pagerank_f = log(pagerank_f)
gen ln_pagerank_m_norm = log(pagerank_m_norm)
gen ln_pagerank_f_norm = log(pagerank_f_norm)

* Inverse hyporbolic sine of clauses
foreach x of varlist *cl_* {
	replace `x' = asinh(`x')
}

* Restrictions
//keep estab with at least 4 sectoral contracts over time frame considered
egen tag_estabid = tag(estabid)
sum cnt, d
tab cnt if tag_estabid==1
keep if cnt>=4
//random sample split
splitsample, generate(sample) nsplit(2) rseed(1234)
sort estabid

* Some descriptives
// gap in page ranks (with and without normalization)
gen gap_pagerank = ln_pagerank_f - ln_pagerank_m
gen gap_pagerank_norm = ln_pagerank_f_norm - ln_pagerank_m_norm
// index ranking of PageRanks (minimum at 0; maximum at 100)
qui sum pagerank_m
gen pagerank_m_index = (pagerank_m-r(min))*(100/(r(max)-r(min)))
qui sum pagerank_f
gen pagerank_f_index = (pagerank_f-r(min))*(100/(r(max)-r(min)))
gen gap_pagerank_index = pagerank_f_index - pagerank_m_index
//correlation of gaps
corr gap_pagerank*

* Clause labeling
//label clauses 
do "$raw/clauselabels.do" 	
//collinearity in clauses
sum cl_43pro_pro cl_51aut_tra	
drop cl_43pro_pro cl_51aut_tra	

* Geography and industry
gen uf = floor(municipality/1e4)
gen region = floor(municipality/1e5)
gen ind1 = floor(cnae20subcl/1e6)
gen ind2 = floor(cnae20subcl/1e5)
gen ind3 = floor(cnae20subcl/1e4)
	
	
**********************************************
*** select clauses with data driven method ***
**********************************************

** LASSO ON VALUE DIFF (method1)**
	
	* Adaptive lasso
	lasso linear gap_pagerank fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	estimates store method1
	matrix b_method1 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m1 "`e(allvars_sel)'"

	* Evaluate
	lassocoef method1, sort(coef, standardized) nofvlabel
	lassogof method1, over(sample) postselection
	qui sum gap_pagerank if sample==1
	di "Variance in sample==1: `r(Var)'"
	qui sum gap_pagerank if sample==2
	di "Variance in sample==2: `r(Var)'"
	
	* Post-selection OLS (with geography and industry fixed effects)
	reghdfe gap_pagerank `selected_vars_m1' if sample==1, absorb(uf ind2)
	estimates store method1_ols 
	matrix b_method1_ols = e(b)'	

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method1) replace
	putexcel A1=matrix(b_method1), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	
	putexcel D1=matrix(b_method1_ols), names nformat(number_d2)
	putexcel D1 = "Clause"
	putexcel E1 = "Coef (OLS)"	
	
	* Sanity check: see if something changes by flipping difference (it doesn't, as it should)
	gen flip_gap = - gap_pagerank
	lasso linear flip_gap fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	lassocoef, display(coef, standardized) sort(coef, standardized)
	drop flip_gap
	
** LASSO ON VALUE DIFF, ENTIRE SAMPLE --> just to check the difference **
	
	* Adaptive lasso
	lasso linear gap_pagerank fe_f fe_m cl_*, selection(adaptive) rseed(1234)
	estimates store method1_v2
	matrix b_method1 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m1 "`e(allvars_sel)'"

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method1_v2) modify
	putexcel A1=matrix(b_method1), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	
** LASSO ON NORMALIZED VALUE DIFF (method2)**
	
	* Adaptive lasso
	lasso linear gap_pagerank_norm fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	estimates store method2
	matrix b_method2 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m2 "`e(allvars_sel)'"

	* Evaluate
	lassocoef method2, sort(coef, standardized) nofvlabel
	lassogof method2, over(sample) postselection
	qui sum gap_pagerank_norm if sample==1
	di "Variance in sample==1: `r(Var)'"
	qui sum gap_pagerank_norm if sample==2
	di "Variance in sample==2: `r(Var)'"
	
	* Post-selection OLS (with geography and industry fixed effects)
	reghdfe gap_pagerank_norm `selected_vars_m2' if sample==1, absorb(uf ind2)
	estimates store method2_ols 
	matrix b_method2_ols = e(b)'	

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method2) modify
	putexcel A1=matrix(b_method2), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	
	putexcel D1=matrix(b_method2_ols), names nformat(number_d2)
	putexcel D1 = "Clause"
	putexcel E1 = "Coef (OLS)"	
	
	* Sanity check: see if something changes by flipping difference (it doesn't, as it should)
	gen flip_gap = - gap_pagerank_norm
	lasso linear flip_gap fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	lassocoef, display(coef, standardized) sort(coef, standardized)
	drop flip_gap
	
** LASSO ON NORMALIZED VALUE DIFF, ENTIRE SAMPLE --> just to check the difference **
	
	* Adaptive lasso
	lasso linear gap_pagerank_norm fe_f fe_m cl_*, selection(adaptive) rseed(1234)
	estimates store method2_v2
	matrix b_method2 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m2 "`e(allvars_sel)'"

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method2_v2) modify
	putexcel A1=matrix(b_method2), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	
** LASSO ON INDEX DIFF (method3)**
	
	* Adaptive lasso
	lasso linear gap_pagerank_index fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	estimates store method3
	matrix b_method3 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m3 "`e(allvars_sel)'"

	* Evaluate
	lassocoef method3, sort(coef, standardized) nofvlabel
	lassogof method3, over(sample) postselection
	qui sum gap_pagerank_index if sample==1
	di "Variance in sample==1: `r(Var)'"
	qui sum gap_pagerank_index if sample==2
	di "Variance in sample==2: `r(Var)'"
	
	* Post-selection OLS (with geography and industry fixed effects)
	reghdfe gap_pagerank_index `selected_vars_m3' if sample==1, absorb(uf ind2)
	estimates store method3_ols 
	matrix b_method3_ols = e(b)'	

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method3) modify
	putexcel A1=matrix(b_method3), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	
	putexcel D1=matrix(b_method3_ols), names nformat(number_d2)
	putexcel D1 = "Clause"
	putexcel E1 = "Coef (OLS)"	
	
	* Sanity check: see if something changes by flipping difference (it doesn't, as it should)
	gen flip_gap = - gap_pagerank_index
	lasso linear flip_gap fe_f fe_m cl_* if sample==1, selection(adaptive) rseed(1234)
	lassocoef, display(coef, standardized) sort(coef, standardized)
	drop flip_gap
	
** LASSO ON INDEX DIFF, ENTIRE SAMPLE --> just to check the difference **
	
	* Adaptive lasso
	lasso linear gap_pagerank_index fe_f fe_m cl_*, selection(adaptive) rseed(1234)
	estimates store method3_v2
	matrix b_method3 = e(b_standardized)'
	lassocoef, display(coef, standardized) sort(coef, standardized)
	local selected_vars_m3 "`e(allvars_sel)'"

	* Export
	putexcel set "$tables\amenity_value_gender.xlsx", sheet(method3_v2) modify
	putexcel A1=matrix(b_method3), names nformat(number_d2)
	putexcel A1 = "Clause"
	putexcel B1 = "Coef (standardized)"	
	

********************************************
*** save clauses with data driven method ***
********************************************

* Method 1: values
import excel using "$tables\amenity_value_gender.xlsx", sheet(method1) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized CoefOLS) (clause coef coefols)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
//correlation between LASSO ranking and OLS with FEs (all LASSO selected clauses)//
pwcorr coef coefols
spearman coef coefols
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m1 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m1 = (aux<=20)
drop if (isfemale_amenity_DD_m1==0)&(ismale_amenity_DD_m1==0)
//correlation between LASSO ranking and OLS with FEs (female and male DD amenities)//
pwcorr coef coefols
spearman coef coefols
keep clause is*
duplicates drop
save "$files/amenities_classification_DD", replace
//method1_v2
import excel using "$tables\amenity_value_gender.xlsx", sheet(method1_v2) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized) (clause coef)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m1v2 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m1v2 = (aux<=20)
drop if (isfemale_amenity_DD_m1v2==0)&(ismale_amenity_DD_m1v2==0)
keep clause is*
duplicates drop
merge 1:1 clause using "$files/amenities_classification_DD"
drop _merge
save "$files/amenities_classification_DD", replace

* Method 2: normalized values
import excel using "$tables\amenity_value_gender.xlsx", sheet(method2) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized CoefOLS) (clause coef coefols)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
//correlation between LASSO ranking and OLS with FEs (all LASSO selected clauses)//
pwcorr coef coefols
spearman coef coefols
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m2 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m2 = (aux<=20)
drop if (isfemale_amenity_DD_m2==0)&(ismale_amenity_DD_m2==0)
//correlation between LASSO ranking and OLS with FEs (female and male DD amenities)//
pwcorr coef coefols
spearman coef coefols
keep clause is*
duplicates drop
merge 1:1 clause using "$files/amenities_classification_DD"
drop _merge
save "$files/amenities_classification_DD", replace
//method2_v2
import excel using "$tables\amenity_value_gender.xlsx", sheet(method2_v2) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized) (clause coef)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m2v2 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m2v2 = (aux<=20)
drop if (isfemale_amenity_DD_m2v2==0)&(ismale_amenity_DD_m2v2==0)
keep clause is*
duplicates drop
merge 1:1 clause using "$files/amenities_classification_DD"
drop _merge
save "$files/amenities_classification_DD", replace

* Method 3: index ranking
import excel using "$tables\amenity_value_gender.xlsx", sheet(method3) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized CoefOLS) (clause coef coefols)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
//correlation between LASSO ranking and OLS with FEs (all LASSO selected clauses)//
pwcorr coef coefols
spearman coef coefols
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m3 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m3 = (aux<=20)
drop if (isfemale_amenity_DD_m3==0)&(ismale_amenity_DD_m3==0)
//correlation between LASSO ranking and OLS with FEs (female and male DD amenities)//
pwcorr coef coefols
spearman coef coefols
keep clause is*
duplicates drop
merge 1:1 clause using "$files/amenities_classification_DD"
drop _merge
save "$files/amenities_classification_DD", replace
//method3_v2
import excel using "$tables\amenity_value_gender.xlsx", sheet(method3_v2) firstrow clear
keep  Clause Coef*
rename (Clause Coefstandardized) (clause coef)
drop if inlist( clause, "fe_f", "fe_m", "_cons")
gsort - coef 
gen aux = _n 
gen isfemale_amenity_DD_m3v2 = (aux<=20)
gsort + coef 
drop aux
gen aux = _n 
gen ismale_amenity_DD_m3v2 = (aux<=20)
drop if (isfemale_amenity_DD_m3v2==0)&(ismale_amenity_DD_m3v2==0)
keep clause is*
duplicates drop
merge 1:1 clause using "$files/amenities_classification_DD"
drop _merge
save "$files/amenities_classification_DD", replace

* Aggregate
foreach xxx of varlist is* {
	replace `xxx' = 0 if `xxx'==.
}
egen cnt_female = rowtotal(isfemale*) 
tab cnt_female if cnt_female>0
egen cnt_male = rowtotal(ismale*) 
tab cnt_male if cnt_male>0
save "$files/amenities_classification_DD", replace


**************************************
*** add clauses info to our panels ***
**************************************

** Baseline: difference in normalized values in the splitsample
use "$files/amenities_classification_DD", clear
levelsof clause if isfemale_amenity_DD_m2==1, local(female)
levelsof clause if ismale_amenity_DD_m2==1, local(male)
local female_am_DD "" 
foreach x of local female {
	local female_am_DD  "`female_am_DD' `x'" 
}
dis "`female_am_DD'"
local male_am_DD "" 
foreach x of local male {
	local male_am_DD  "`male_am_DD' `x'" 
}
dis "`male_am_DD'"
//binary
local female_am_DD_b ""
foreach x of local female {
	di "`x'"
	local female_am_DD_b  = "`female_am_DD_b' " + "b`x'" 
}
dis "`female_am_DD_b'"
local male_am_DD_b "" 
foreach x of local male {
	local male_am_DD_b  = "`male_am_DD_b' " + "b`x'" 
}
dis "`male_am_DD_b'"

// Take new contracts data and add DD definition
use "$files/estab_union_act.dta", clear
cap drop *_D*  
egen female_clauses_DDbase = rowtotal(`female_am_DD')
egen male_clauses_DDbase = rowtotal(`male_am_DD')
gen any_female_cl_DDbase = (female_clauses_DDbase>0) 
gen any_male_cl_DDbase = (male_clauses_DDbase>0)  
gen female_cl_DDbase_sh = female_clauses_DDbase/total_clauses
gen male_cl_DDbase_sh = male_clauses_DDbase/total_clauses
egen female_clauses_DDbase_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDbase_b = rowtotal(`male_am_DD_b')
gen female_cl_DDbase_sh_b = female_clauses_DDbase_b/total_clauses_b
gen male_cl_DDbase_sh_b = male_clauses_DDbase_b/total_clauses_b
replace female_cl_DDbase_sh = 0 if female_cl_DDbase_sh==.
replace male_cl_DDbase_sh = 0 if male_cl_DDbase_sh==.
replace female_cl_DDbase_sh_b = 0 if female_cl_DDbase_sh_b==.
replace male_cl_DDbase_sh_b = 0 if male_cl_DDbase_sh_b==.
save "$files/estab_union_act.dta", replace 
		
// Take filled panel data and add DD definition	
use "$files/filledpanel.dta", clear
cap drop *_D* 
egen female_clauses_DDbase = rowtotal(`female_am_DD')
egen male_clauses_DDbase = rowtotal(`male_am_DD')
gen any_female_cl_DDbase = (female_clauses_DDbase>0) 
gen any_male_cl_DDbase = (male_clauses_DDbase>0)  
gen female_cl_DDbase_sh = female_clauses_DDbase/total_clauses
gen male_cl_DDbase_sh = male_clauses_DDbase/total_clauses
egen female_clauses_DDbase_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDbase_b = rowtotal(`male_am_DD_b')
gen female_cl_DDbase_sh_b = female_clauses_DDbase_b/total_clauses_b
gen male_cl_DDbase_sh_b = male_clauses_DDbase_b/total_clauses_b
replace female_cl_DDbase_sh = 0 if female_cl_DDbase_sh==.
replace male_cl_DDbase_sh = 0 if male_cl_DDbase_sh==.
replace female_cl_DDbase_sh_b = 0 if female_cl_DDbase_sh_b==.
replace male_cl_DDbase_sh_b = 0 if male_cl_DDbase_sh_b==.
save "$files/filledpanel.dta", replace 

** All: clause is picked up regardless of the method
use "$files/amenities_classification_DD", clear
levelsof clause if cnt_female==6, local(female)
levelsof clause if cnt_male==6, local(male)
local female_am_DD "" 
foreach x of local female {
	local female_am_DD  "`female_am_DD' `x'" 
}
dis "`female_am_DD'"
local male_am_DD "" 
foreach x of local male {
	local male_am_DD  "`male_am_DD' `x'" 
}
dis "`male_am_DD'"
//binary
local female_am_DD_b ""
foreach x of local female {
	di "`x'"
	local female_am_DD_b  = "`female_am_DD_b' " + "b`x'" 
}
dis "`female_am_DD_b'"
local male_am_DD_b "" 
foreach x of local male {
	local male_am_DD_b  = "`male_am_DD_b' " + "b`x'" 
}
dis "`male_am_DD_b'"

// Take new contracts data and add DD definition
use "$files/estab_union_act.dta", clear
cap drop *_DDal*  
egen female_clauses_DDall = rowtotal(`female_am_DD')
egen male_clauses_DDall = rowtotal(`male_am_DD')
gen any_female_cl_DDall = (female_clauses_DDall>0) 
gen any_male_cl_DDall = (male_clauses_DDall>0)  
gen female_cl_DDall_sh = female_clauses_DDall/total_clauses
gen male_cl_DDall_sh = male_clauses_DDall/total_clauses
egen female_clauses_DDall_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDall_b = rowtotal(`male_am_DD_b')
gen female_cl_DDall_sh_b = female_clauses_DDall_b/total_clauses_b
gen male_cl_DDall_sh_b = male_clauses_DDall_b/total_clauses_b
replace female_cl_DDall_sh = 0 if female_cl_DDall_sh==.
replace male_cl_DDall_sh = 0 if male_cl_DDall_sh==.
replace female_cl_DDall_sh_b = 0 if female_cl_DDall_sh_b==.
replace male_cl_DDall_sh_b = 0 if male_cl_DDall_sh_b==.
save "$files/estab_union_act.dta", replace 
		
// Take filled panel data and add DD definition	
use "$files/filledpanel.dta", clear
cap drop *_DDal* 
egen female_clauses_DDall = rowtotal(`female_am_DD')
egen male_clauses_DDall = rowtotal(`male_am_DD')
gen any_female_cl_DDall = (female_clauses_DDall>0) 
gen any_male_cl_DDall = (male_clauses_DDall>0)  
gen female_cl_DDall_sh = female_clauses_DDall/total_clauses
gen male_cl_DDall_sh = male_clauses_DDall/total_clauses
egen female_clauses_DDall_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDall_b = rowtotal(`male_am_DD_b')
gen female_cl_DDall_sh_b = female_clauses_DDall_b/total_clauses_b
gen male_cl_DDall_sh_b = male_clauses_DDall_b/total_clauses_b
replace female_cl_DDall_sh = 0 if female_cl_DDall_sh==.
replace male_cl_DDall_sh = 0 if male_cl_DDall_sh==.
replace female_cl_DDall_sh_b = 0 if female_cl_DDall_sh_b==.
replace male_cl_DDall_sh_b = 0 if male_cl_DDall_sh_b==.
save "$files/filledpanel.dta", replace 

** Any: clause is picked up in any of the methods
use "$files/amenities_classification_DD", clear
levelsof clause if cnt_female>=1, local(female)
levelsof clause if cnt_male>=1, local(male)
local female_am_DD "" 
foreach x of local female {
	local female_am_DD  "`female_am_DD' `x'" 
}
dis "`female_am_DD'"
local male_am_DD "" 
foreach x of local male {
	local male_am_DD  "`male_am_DD' `x'" 
}
dis "`male_am_DD'"
//binary
local female_am_DD_b ""
foreach x of local female {
	di "`x'"
	local female_am_DD_b  = "`female_am_DD_b' " + "b`x'" 
}
dis "`female_am_DD_b'"
local male_am_DD_b "" 
foreach x of local male {
	local male_am_DD_b  = "`male_am_DD_b' " + "b`x'" 
}
dis "`male_am_DD_b'"

// Take new contracts data and add DD definition
use "$files/estab_union_act.dta", clear
cap drop *_DDan*  
egen female_clauses_DDany = rowtotal(`female_am_DD')
egen male_clauses_DDany = rowtotal(`male_am_DD')
gen any_female_cl_DDany = (female_clauses_DDany>0) 
gen any_male_cl_DDany = (male_clauses_DDany>0)  
gen female_cl_DDany_sh = female_clauses_DDany/total_clauses
gen male_cl_DDany_sh = male_clauses_DDany/total_clauses
egen female_clauses_DDany_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDany_b = rowtotal(`male_am_DD_b')
gen female_cl_DDany_sh_b = female_clauses_DDany_b/total_clauses_b
gen male_cl_DDany_sh_b = male_clauses_DDany_b/total_clauses_b
replace female_cl_DDany_sh = 0 if female_cl_DDany_sh==.
replace male_cl_DDany_sh = 0 if male_cl_DDany_sh==.
replace female_cl_DDany_sh_b = 0 if female_cl_DDany_sh_b==.
replace male_cl_DDany_sh_b = 0 if male_cl_DDany_sh_b==.
save "$files/estab_union_act.dta", replace 
		
// Take filled panel data and add DD definition	
use "$files/filledpanel.dta", clear
cap drop *_DDan* 
egen female_clauses_DDany = rowtotal(`female_am_DD')
egen male_clauses_DDany = rowtotal(`male_am_DD')
gen any_female_cl_DDany = (female_clauses_DDany>0) 
gen any_male_cl_DDany = (male_clauses_DDany>0)  
gen female_cl_DDany_sh = female_clauses_DDany/total_clauses
gen male_cl_DDany_sh = male_clauses_DDany/total_clauses
egen female_clauses_DDany_b = rowtotal(`female_am_DD_b')
egen male_clauses_DDany_b = rowtotal(`male_am_DD_b')
gen female_cl_DDany_sh_b = female_clauses_DDany_b/total_clauses_b
gen male_cl_DDany_sh_b = male_clauses_DDany_b/total_clauses_b
replace female_cl_DDany_sh = 0 if female_cl_DDany_sh==.
replace male_cl_DDany_sh = 0 if male_cl_DDany_sh==.
replace female_cl_DDany_sh_b = 0 if female_cl_DDany_sh_b==.
replace male_cl_DDany_sh_b = 0 if male_cl_DDany_sh_b==.
save "$files/filledpanel.dta", replace 

log close
